---
title: "ERDDAP Extraction"
params:
# yml: "meta/erddap_sst.yml"
yml: "meta/erddap_precip.yml"
subtitle: "metadata: *`r basename(params$yml)`*"
execute:
cache: false
---
```{r}
#| label: setup
# dir_extractr = "/share/github/marinebon/extractr"
# dir_extractr = "~/Github/marinebon/extractr" # DEBUG
# devtools::load_all(dir_extractr)
# devtools::install_local(dir_extractr, force = T)
# devtools::install_github("marinebon/extractr", force = T)
librarian:: shelf (
dplyr, DT, fs, glue, here, logger, lubridate, mapview, purrr, RColorBrewer, readr,
sf, stringr, terra, tibble, tidyr, units, yaml,
marinebon/ extractr,
quiet = T)
options (readr.show_col_types = F)
mapviewOptions (
basemaps = "Esri.OceanBasemap" ,
vector.palette = colorRampPalette (brewer.pal (n= 5 , name= "Spectral" )))
if (! exists ("params" ))
# params <- list(yml = here("meta/erddap_sst.yml")) # DEBUG
params <- list (yml = here ("meta/erddap_precip.yml" )) # DEBUG
```
## Polygons
```{r}
#| label: polygons
sanctuaries_rds <- here ("../climate-dashboard/data/sanctuaries.rds" )
buf_pct <- 0.20 # 20% of area
ply_sanctuaries <- readRDS (sanctuaries_rds)
# buffer polygon and get bounding boxes
ply_sanctuaries <- ply_sanctuaries |>
mutate (
area_km2 = st_area (geom) |>
set_units ("km^2" ) |>
as.numeric () |>
round (2 ),
buf_km = (sqrt (area_km2) * buf_pct) |>
round (2 ),
bbox_geom = map2 (geom, area_km2, \(g,a) {
st_sfc (g, crs = 4326 ) |>
st_buffer (buf_km * 1000 ) |>
st_bbox () |>
round (2 ) |>
st_as_sfc () }),
bbox_chr = map_chr (bbox_geom, \(x) { # xmin, ymin, xmax and ymax
st_bbox (x) |>
as.numeric () |>
paste (collapse = "," ) } ) ) |>
unnest (bbox_geom)
bb_sanctuaries <- st_set_geometry (ply_sanctuaries, "bbox_geom" ) |>
select (- geom)
ply_sanctuaries <- select (ply_sanctuaries, - bbox_geom)
# show map
mapView (ply_sanctuaries, zcol = "nms" ) +
mapView (bb_sanctuaries, zcol = "nms" , legend = F)
ply_sanctuaries |>
st_drop_geometry () |>
datatable ()
```
## Dataset
### Metadata
Contents of `r basename(params$yml)` :
```{r}
#| label: yml
#| results: asis
cat (
"```yaml" ,
readLines (params$ yml),
"```" ,
sep = " \n " )
```
### Information
```{r}
#| label: ed_info
meta <- read_yaml (params$ yml)
proc_prod <- path_ext_remove (basename (params$ yml))
is_server <- Sys.info ()["sysname" ] == "Linux"
dir_out <- ifelse (
is_server,
glue ("/share/data/noaa-onms/climate-dashboard-app/{proc_prod}" ),
here (glue ("data_local/{proc_prod}" )))
dir_create (dir_out)
v <- meta$ erddap_variable
(ed <- ed_info (meta$ erddap_url))
times <- ed_dim (ed, "time" )
```
## Polygon years
```{r}
#| label: d_nms_yr_todo
d_nms_yr_todo <- ply_sanctuaries |>
st_drop_geometry () |>
arrange (nms) |>
select (nms) |>
cross_join (
tibble (
year = year (min (times)): year (max (times)))) |>
mutate (
d = map2 (nms, year, \(nms, year) { # nms = "CBNMS"; year = 2025
times_yr <- times[year (times) == year]
tif <- glue ("{dir_out}/{nms}/{year}.tif" )
# if missing tif,return all times
if (! file.exists (tif))
return (list (
time_min = min (times_yr),
time_max = max (times_yr),
n_times = length (times_yr)))
# get times from tif
r <- rast (tif)
times_tif <- time (r)
# if all times done, return NAs
if (all (times_yr %in% times_tif))
return (list (
n_times = 0 ,
time_min = NA ,
time_max= NA ))
# otherwise, return time range missing
i <- ! times_yr %in% times_tif
list (
n_times = sum (i),
time_min = min (times_yr[i]),
time_max = max (times_yr[i]))
}),
time_min = map_vec (d, pluck, "time_min" ),
time_max = map_vec (d, pluck, "time_max" ),
n_times = map_int (d, pluck, "n_times" ) ) |>
select (- d) |>
filter (n_times > 0 ) |>
rowid_to_column ("i" ) |>
relocate (i)
d_nms_yr_todo |>
group_by (nms) %>%
{if (nrow (.) > 0 )
summarize (
.,
time_min = min (time_min, na.rm = T),
time_max = max (time_max, na.rm = T),
n_times = sum (n_times)) else .} |>
datatable (
caption = "Sanctuaries missing available ERDDAP times." ,
rownames = F,
options = list (
pageLength = 5 ,
lengthMenu = c (5 , 50 , nrow (d_nms_yr_todo))))
```
## Extract dataset per polygon year
Using [ `extractr::ed_extract()` ](https://marinebon.github.io/extractr/reference/ed_extract.html) , .
```{r}
#| label: iterate_ed_extract
# rerddap::cache_delete_all()
fxn <- function (i, nms, time_min, time_max, ...){
# i = 1; nms = "CBNMS"; time_min = as.POSIXct("1998-01-01 UTC"); time_max = as.POSIXct("1998-12-01 UTC")
err <- tryCatch ({
log_info ("{sprintf('%03d', i)}: {nms}, {time_min} to {time_max}" )
yr <- year (time_min)
ply <- ply_sanctuaries |>
filter (nms == !! nms)
bb <- bb_sanctuaries |>
filter (nms == !! nms) |>
st_bbox ()
# browser()
# devtools::load_all("~/Github/marinebon/extractr")
extractr:: ed_extract (
ed,
var = v,
sf_zones = ply,
bbox = bb,
mask_tif = F,
rast_tif = glue ("{dir_out}/{nms}/{yr}.tif" ),
zonal_fun = "mean" ,
zonal_csv = glue ("{dir_out}/{nms}/{yr}.csv" ),
dir_nc = glue ("{dir_out}/{nms}/{yr}_nc" ),
keep_nc = F,
n_max_vals_per_req = 1e+05 ,
time_min = time_min,
time_max = time_max,
verbose = T)
return (NA )
}, error = function (e) {
log_error (conditionMessage (e))
return (conditionMessage (e))
})
err
}
res <- d_nms_yr_todo |>
mutate (
error = pmap_chr (list (i, nms, time_min, time_max), fxn))
```
### Successes
```{r}
#| label: success_summary
res |>
mutate (
success = is.na (error)) |>
group_by (success) |>
summarize (
n = n ()) |>
datatable ()
```
### Errors (if any)
```{r}
#| label: error_summary
# rast(glue("{dir_out}/{nms}/{yr}.tif")) |> names()
res |>
filter (! is.na (error)) |>
group_by (nms) |>
summarize (
n_years = n (),
errors = unique (error) |> paste (collapse = " \n\n ---- \n\n " )) |>
datatable ()
```
::: {.content-hidden}
## TODO
`ed_extract()` :
- [x] delete *_nc dir
- [x] differentiate existing done vs todo for given year
- [ ] allow irregular datasets, like `meta/irregular/*.yaml` : `ERROR: x cell sizes are not regular`
- [ ] break up into functions, not exported
`erddap.qmd` :
- [ ] make `ed_extract()` to single MBNMS with features for main vs david; add "ALL" option to `ed_extract()`
- [ ] add buffer to all (and redo)
- [ ] wrap retry with `ed_dim()` too
:::
```{r}
#| label: rm_empty_nc_dirs
#| eval: false
#| echo: false
# find empty directories ending in _nc and delete them
d_dirs <- tibble (
dir = list.dirs (here ("data" ), full.names = T, recursive = T)) |>
filter (str_detect (dir, "_nc$" )) |>
mutate (
n_files = map_int (dir, \(x) {length (list.files (x))}))
# show non-empty directories
d_dirs |>
filter (n_files > 0 ) |>
datatable ()
# delete empty directories
d_dirs |>
filter (n_files == 0 ) |>
pull (dir) |>
walk (\(x) {
message (glue ("Deleting {x}" ))
unlink (x, recursive = T) })
```
::: {.callout-caution collapse="true"}
## R package versions
```{r}
devtools:: session_info ()
```
:::